Set Up¶

In [2]:
# Import Libraries

import json                 # Reading and writing JSON
from glob import glob       # File pattern matching
import os                   # Operating system access
import pathlib              # File navigation
import holoviews as hv      # Interactable plot
import panel as pn          # Plot formatting
pn.extension()              # Plot formatting
hv.extension('bokeh')       # Interactable plot
import numpy as np          # Numerical computing


import earthpy              # Work with geospatial data
import earthpy.api.appeears as eaapp    # Access earthpy api
import xarray as xr         # Multi-dimensional arrays
import hvplot.xarray        # HVPlot and xarray
import rioxarray as rxr     # Raster for xarray

import matplotlib.pyplot as plt    # Plotting
import statsmodels.formula.api as smf   # Statistical models
import pandas as pd         # Work with tabular data
import geopandas as gpd     # Wowk with geospatial data
import hvplot.pandas        # Plot geospatial data
In [3]:
# Establish project directory 

project = earthpy.Project(
    'Gila River Vegetation', dirname='NDVI_data')
project.get_data()

Establish Boundaries¶

In [43]:
# Load in the boundary data

aitsn_gdf = gpd.read_file(project.project_dir / 'tl_2020_us_aitsn')

print(type(aitsn_gdf))
aitsn_gdf.head()
<class 'geopandas.geodataframe.GeoDataFrame'>
Out[43]:
AIANNHCE TRSUBCE TRSUBNS GEOID NAME NAMELSAD LSAD CLASSFP MTFCC FUNCSTAT ALAND AWATER INTPTLAT INTPTLON geometry
0 2430 653 02419073 2430653 Red Valley Red Valley Chapter T2 D7 G2300 A 922036695 195247 +36.6294607 -109.0550394 POLYGON ((-109.2827 36.64644, -109.28181 36.65...
1 2430 665 02419077 2430665 Rock Point Rock Point Chapter T2 D7 G2300 A 720360268 88806 +36.6598701 -109.6166836 POLYGON ((-109.85922 36.49859, -109.85521 36.5...
2 2430 675 02419081 2430675 Rough Rock Rough Rock Chapter T2 D7 G2300 A 364475668 216144 +36.3976971 -109.7695183 POLYGON ((-109.93053 36.40672, -109.92923 36.4...
3 2430 325 02418975 2430325 Indian Wells Indian Wells Chapter T2 D7 G2300 A 717835323 133795 +35.3248534 -110.0855000 POLYGON ((-110.24222 35.36327, -110.24215 35.3...
4 2430 355 02418983 2430355 Kayenta Kayenta Chapter T2 D7 G2300 A 1419241065 1982848 +36.6884391 -110.3045616 POLYGON ((-110.56817 36.73489, -110.56603 36.7...
In [5]:
# Plot GRIC and SRIC boundary overview

overview_plot = aitsn_gdf.hvplot(
    geo=True, tiles='CartoLight',
    frame_width=500,
    legend=False, fill_color=None, fill_alpha = .5,
    line_color='blue',
    colorbar = False,
    hover_cols='all',
    xlim=(-112.4, -111.4),
    ylim=(32.9, 33.6),
    title = 'GRIC & SRIC Boundary Overview'
)

overview_plot
Out[5]:
In [6]:
# Save the plot as an HTML file

hv.save(overview_plot, 'img/overview_plot.html', fmt='html')
WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='44422e10-b12c-4191-802e-c44f25f55e8f', ...)
In [7]:
# Plot GRIC Boundary and set up gdf

gric_gdf = aitsn_gdf.loc[aitsn_gdf.AIANNHCE=='1310'].dissolve()

gric_bound_plot = gric_gdf.hvplot(
    geo=True, tiles='CartoLight',
    fill_color='green', line_color='Blue',
    fill_alpha=.3,  # must be b/w 0-1
    title='Gila River Indian Community',
    frame_width=500)

gric_bound_plot
Out[7]:
In [8]:
# Plot SRIC Boundary and set up gdf

sric_gdf = aitsn_gdf.loc[aitsn_gdf.AIANNHCE=='3340'].dissolve()

sric_bound_plot = sric_gdf.hvplot(
    geo=True, tiles='CartoLight',
    fill_color='green', line_color='Blue',
    fill_alpha=.3,  # must be b/w 0-1
    title='Salt River Indian Community',
    frame_width=500)

sric_bound_plot
Out[8]:
In [9]:
# Save both boundary plots

hv.save(gric_bound_plot, 'img/gric_bound_plot.html', fmt='html')
hv.save(sric_bound_plot, 'img/sric_bound_plot.html', fmt='html')
WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='ca7dbb49-e1f2-452c-b4ba-b2ab2bd90c48', ...)
WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='0f6af472-c0d0-4107-abca-dd4d2e9b0375', ...)

Download NDVI Data¶

The GRIC NDVI data is included in the download for the AITSN Boundary data used above, and the earthpy API can be used to download the SRIC NDVI data using the NASA Earthdata API

In [10]:
# Set up download key

ndvi_downloader = eaapp.AppeearsDownloader(

    ### give your download a name
    download_key = "salt-river-ndvi",

    ### tell it to put the data in your project that you already defined
    project = project,

    ### specify the MODIS product you want
    product = 'MOD13Q1.061',
    layer = '_250m_16_days_NDVI',

    ### choose a start date and end data
    start_date = "05-24",
    end_date = "08-29",

    ### recurring means you want those dates over multiple years
    recurring = True,

    ### specify the range of years you want
    year_range = [2001, 2022],

    ### specify the polygon you want to get NDVI data for
    polygon = sric_gdf
)
In [11]:
# Download files if not already saved

ndvi_downloader.download_files(cache=True)
Out[11]:
<generator object Path.rglob at 0x788728006bd0>
In [44]:
# Get a sorted list of file path for only the 'gila_river-ndvi' folder

gila_dir = project.project_dir / 'gila-river-ndvi'

gric_paths = sorted(list(gila_dir.rglob('*NDVI*.tif')))

# Display the first and last three files paths to check the pattern
gric_paths[:3], gric_paths[-3:]
Out[44]:
([PosixPath('/workspaces/data/ndvi_data/gila-river-ndvi/MOD13Q1.061_2001137_to_2022244/MOD13Q1.061__250m_16_days_NDVI_doy2001145000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/gila-river-ndvi/MOD13Q1.061_2001137_to_2022244/MOD13Q1.061__250m_16_days_NDVI_doy2001161000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/gila-river-ndvi/MOD13Q1.061_2001137_to_2022244/MOD13Q1.061__250m_16_days_NDVI_doy2001177000000_aid0001.tif')],
 [PosixPath('/workspaces/data/ndvi_data/gila-river-ndvi/MOD13Q1.061_2001137_to_2022244/MOD13Q1.061__250m_16_days_NDVI_doy2022209000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/gila-river-ndvi/MOD13Q1.061_2001137_to_2022244/MOD13Q1.061__250m_16_days_NDVI_doy2022225000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/gila-river-ndvi/MOD13Q1.061_2001137_to_2022244/MOD13Q1.061__250m_16_days_NDVI_doy2022241000000_aid0001.tif')])
In [45]:
# Get a sorted list of file path for only the 'satl-river-ndvi' folder

salt_dir = project.project_dir / 'salt-river-ndvi'

sric_paths = sorted(list(salt_dir.rglob('*NDVI*.tif')))

# Display the first and last three files paths to check the pattern
sric_paths[:3], sric_paths[-3:]
Out[45]:
([PosixPath('/workspaces/data/ndvi_data/salt-river-ndvi/MOD13Q1.061_2001129_to_2022241/MOD13Q1.061__250m_16_days_NDVI_doy2001129000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/salt-river-ndvi/MOD13Q1.061_2001129_to_2022241/MOD13Q1.061__250m_16_days_NDVI_doy2001145000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/salt-river-ndvi/MOD13Q1.061_2001129_to_2022241/MOD13Q1.061__250m_16_days_NDVI_doy2001161000000_aid0001.tif')],
 [PosixPath('/workspaces/data/ndvi_data/salt-river-ndvi/MOD13Q1.061_2001129_to_2022241/MOD13Q1.061__250m_16_days_NDVI_doy2022209000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/salt-river-ndvi/MOD13Q1.061_2001129_to_2022241/MOD13Q1.061__250m_16_days_NDVI_doy2022225000000_aid0001.tif'),
  PosixPath('/workspaces/data/ndvi_data/salt-river-ndvi/MOD13Q1.061_2001129_to_2022241/MOD13Q1.061__250m_16_days_NDVI_doy2022241000000_aid0001.tif')])
In [46]:
# Compare datasets

print(len(sric_paths))
print(len(gric_paths))
171
154

The Salt River NDVI files included an extra day on the front end of some years (day 119). We will filter both file lists by the same date range later to ensure everything lines up.

Format NDVI Data¶

In [15]:
# Add a date dimension to each file from the file name

doy_start = -25
doy_end = -19

all_paths = {
    "gric": gric_paths,
    "sric": sric_paths
}

results = {
    "gric": [],
    "sric": []
}

for label, paths in all_paths.items():
    for path in paths:
        # Get date from file name
        doy = path.name[doy_start:doy_end]
        date = pd.to_datetime(doy, format='%Y%j')

        # Open dataset
        da = rxr.open_rasterio(path, mask_and_scale=True).squeeze()

        # Add date dimension and clean metadata
        da = da.assign_coords({'date': date})
        da = da.expand_dims({'date': 1})
        da.name = 'NDVI'

        # Append to the correct list
        results[label].append(da)

gric_das = results["gric"]
sric_das = results["sric"]

print(len(gric_das))
print(len(sric_das))
154
171
In [16]:
# Combine NDVI images from all dates

combined = {}

for label, da_list in results.items():
    combined[label] = xr.combine_by_coords(
        da_list, coords=['date'], compat='override'
    )

# Access the final outputs
gric_da = combined["gric"]
sric_da = combined["sric"]

print(sric_da)
print(gric_da)
<xarray.Dataset> Size: 6MB
Dimensions:      (date: 171, y: 71, x: 130)
Coordinates:
    band         int64 8B 1
  * x            (x) float64 1kB -111.9 -111.9 -111.9 ... -111.6 -111.6 -111.6
  * y            (y) float64 568B 33.58 33.58 33.58 33.58 ... 33.44 33.44 33.44
    spatial_ref  int64 8B 0
  * date         (date) datetime64[ns] 1kB 2001-01-12 2001-01-14 ... 2022-01-24
Data variables:
    NDVI         (date, y, x) float32 6MB 0.2532 0.2532 0.2532 ... 0.18 0.1789
<xarray.Dataset> Size: 48MB
Dimensions:      (date: 154, y: 203, x: 382)
Coordinates:
    band         int64 8B 1
  * x            (x) float64 3kB -112.3 -112.3 -112.3 ... -111.5 -111.5 -111.5
  * y            (y) float64 2kB 33.39 33.39 33.38 33.38 ... 32.97 32.97 32.97
    spatial_ref  int64 8B 0
  * date         (date) datetime64[ns] 1kB 2001-01-14 2001-01-16 ... 2022-01-24
Data variables:
    NDVI         (date, y, x) float32 48MB 0.8282 0.6146 ... 0.2146 0.2085
In [47]:
# Make sure each data array only contains data for dates present in both

common_dates = np.intersect1d(gric_da.date.values, sric_da.date.values)

gric_matched = gric_da.sel(date=common_dates)
sric_matched = sric_da.sel(date=common_dates)

len(gric_matched.date) - len(sric_matched.date)
Out[47]:
0
In [18]:
# Clip the data arrays to only include data within each boundary

gric_ndvi = gric_matched.rio.clip(gric_gdf.geometry, from_disk=True)

sric_ndvi = sric_matched.rio.clip(sric_gdf.geometry, from_disk=True)
In [19]:
# Calculate spatial means for each dataset

gric_mean = gric_ndvi.mean(dim=['x','y'])
sric_mean = sric_ndvi.mean(dim=['x','y'])
In [48]:
# Check everything looks as expected

gric_mean
Out[48]:
<xarray.Dataset> Size: 2kB
Dimensions:      (date: 154)
Coordinates:
    band         int64 8B 1
  * date         (date) datetime64[ns] 1kB 2001-01-14 2001-01-16 ... 2022-01-24
    spatial_ref  int64 8B 0
Data variables:
    NDVI         (date) float32 616B 0.1927 0.1838 0.1934 ... 0.2401 0.2125
xarray.Dataset
    • date: 154
    • band
      ()
      int64
      1
      array(1)
    • date
      (date)
      datetime64[ns]
      2001-01-14 ... 2022-01-24
      array(['2001-01-14T00:00:00.000000000', '2001-01-16T00:00:00.000000000',
             '2001-01-17T00:00:00.000000000', '2001-01-19T00:00:00.000000000',
             '2001-01-20T00:00:00.000000000', '2001-01-22T00:00:00.000000000',
             '2001-01-24T00:00:00.000000000', '2002-01-14T00:00:00.000000000',
             '2002-01-16T00:00:00.000000000', '2002-01-17T00:00:00.000000000',
             '2002-01-19T00:00:00.000000000', '2002-01-20T00:00:00.000000000',
             '2002-01-22T00:00:00.000000000', '2002-01-24T00:00:00.000000000',
             '2003-01-14T00:00:00.000000000', '2003-01-16T00:00:00.000000000',
             '2003-01-17T00:00:00.000000000', '2003-01-19T00:00:00.000000000',
             '2003-01-20T00:00:00.000000000', '2003-01-22T00:00:00.000000000',
             '2003-01-24T00:00:00.000000000', '2004-01-14T00:00:00.000000000',
             '2004-01-16T00:00:00.000000000', '2004-01-17T00:00:00.000000000',
             '2004-01-19T00:00:00.000000000', '2004-01-20T00:00:00.000000000',
             '2004-01-22T00:00:00.000000000', '2004-01-24T00:00:00.000000000',
             '2005-01-14T00:00:00.000000000', '2005-01-16T00:00:00.000000000',
             '2005-01-17T00:00:00.000000000', '2005-01-19T00:00:00.000000000',
             '2005-01-20T00:00:00.000000000', '2005-01-22T00:00:00.000000000',
             '2005-01-24T00:00:00.000000000', '2006-01-14T00:00:00.000000000',
             '2006-01-16T00:00:00.000000000', '2006-01-17T00:00:00.000000000',
             '2006-01-19T00:00:00.000000000', '2006-01-20T00:00:00.000000000',
             '2006-01-22T00:00:00.000000000', '2006-01-24T00:00:00.000000000',
             '2007-01-14T00:00:00.000000000', '2007-01-16T00:00:00.000000000',
             '2007-01-17T00:00:00.000000000', '2007-01-19T00:00:00.000000000',
             '2007-01-20T00:00:00.000000000', '2007-01-22T00:00:00.000000000',
             '2007-01-24T00:00:00.000000000', '2008-01-14T00:00:00.000000000',
             '2008-01-16T00:00:00.000000000', '2008-01-17T00:00:00.000000000',
             '2008-01-19T00:00:00.000000000', '2008-01-20T00:00:00.000000000',
             '2008-01-22T00:00:00.000000000', '2008-01-24T00:00:00.000000000',
             '2009-01-14T00:00:00.000000000', '2009-01-16T00:00:00.000000000',
             '2009-01-17T00:00:00.000000000', '2009-01-19T00:00:00.000000000',
             '2009-01-20T00:00:00.000000000', '2009-01-22T00:00:00.000000000',
             '2009-01-24T00:00:00.000000000', '2010-01-14T00:00:00.000000000',
             '2010-01-16T00:00:00.000000000', '2010-01-17T00:00:00.000000000',
             '2010-01-19T00:00:00.000000000', '2010-01-20T00:00:00.000000000',
             '2010-01-22T00:00:00.000000000', '2010-01-24T00:00:00.000000000',
             '2011-01-14T00:00:00.000000000', '2011-01-16T00:00:00.000000000',
             '2011-01-17T00:00:00.000000000', '2011-01-19T00:00:00.000000000',
             '2011-01-20T00:00:00.000000000', '2011-01-22T00:00:00.000000000',
             '2011-01-24T00:00:00.000000000', '2012-01-14T00:00:00.000000000',
             '2012-01-16T00:00:00.000000000', '2012-01-17T00:00:00.000000000',
             '2012-01-19T00:00:00.000000000', '2012-01-20T00:00:00.000000000',
             '2012-01-22T00:00:00.000000000', '2012-01-24T00:00:00.000000000',
             '2013-01-14T00:00:00.000000000', '2013-01-16T00:00:00.000000000',
             '2013-01-17T00:00:00.000000000', '2013-01-19T00:00:00.000000000',
             '2013-01-20T00:00:00.000000000', '2013-01-22T00:00:00.000000000',
             '2013-01-24T00:00:00.000000000', '2014-01-14T00:00:00.000000000',
             '2014-01-16T00:00:00.000000000', '2014-01-17T00:00:00.000000000',
             '2014-01-19T00:00:00.000000000', '2014-01-20T00:00:00.000000000',
             '2014-01-22T00:00:00.000000000', '2014-01-24T00:00:00.000000000',
             '2015-01-14T00:00:00.000000000', '2015-01-16T00:00:00.000000000',
             '2015-01-17T00:00:00.000000000', '2015-01-19T00:00:00.000000000',
             '2015-01-20T00:00:00.000000000', '2015-01-22T00:00:00.000000000',
             '2015-01-24T00:00:00.000000000', '2016-01-14T00:00:00.000000000',
             '2016-01-16T00:00:00.000000000', '2016-01-17T00:00:00.000000000',
             '2016-01-19T00:00:00.000000000', '2016-01-20T00:00:00.000000000',
             '2016-01-22T00:00:00.000000000', '2016-01-24T00:00:00.000000000',
             '2017-01-14T00:00:00.000000000', '2017-01-16T00:00:00.000000000',
             '2017-01-17T00:00:00.000000000', '2017-01-19T00:00:00.000000000',
             '2017-01-20T00:00:00.000000000', '2017-01-22T00:00:00.000000000',
             '2017-01-24T00:00:00.000000000', '2018-01-14T00:00:00.000000000',
             '2018-01-16T00:00:00.000000000', '2018-01-17T00:00:00.000000000',
             '2018-01-19T00:00:00.000000000', '2018-01-20T00:00:00.000000000',
             '2018-01-22T00:00:00.000000000', '2018-01-24T00:00:00.000000000',
             '2019-01-14T00:00:00.000000000', '2019-01-16T00:00:00.000000000',
             '2019-01-17T00:00:00.000000000', '2019-01-19T00:00:00.000000000',
             '2019-01-20T00:00:00.000000000', '2019-01-22T00:00:00.000000000',
             '2019-01-24T00:00:00.000000000', '2020-01-14T00:00:00.000000000',
             '2020-01-16T00:00:00.000000000', '2020-01-17T00:00:00.000000000',
             '2020-01-19T00:00:00.000000000', '2020-01-20T00:00:00.000000000',
             '2020-01-22T00:00:00.000000000', '2020-01-24T00:00:00.000000000',
             '2021-01-14T00:00:00.000000000', '2021-01-16T00:00:00.000000000',
             '2021-01-17T00:00:00.000000000', '2021-01-19T00:00:00.000000000',
             '2021-01-20T00:00:00.000000000', '2021-01-22T00:00:00.000000000',
             '2021-01-24T00:00:00.000000000', '2022-01-14T00:00:00.000000000',
             '2022-01-16T00:00:00.000000000', '2022-01-17T00:00:00.000000000',
             '2022-01-19T00:00:00.000000000', '2022-01-20T00:00:00.000000000',
             '2022-01-22T00:00:00.000000000', '2022-01-24T00:00:00.000000000'],
            dtype='datetime64[ns]')
    • spatial_ref
      ()
      int64
      0
      crs_wkt :
      GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
      semi_major_axis :
      6378137.0
      semi_minor_axis :
      6356752.314245179
      inverse_flattening :
      298.257223563
      reference_ellipsoid_name :
      WGS 84
      longitude_of_prime_meridian :
      0.0
      prime_meridian_name :
      Greenwich
      geographic_crs_name :
      WGS 84
      horizontal_datum_name :
      World Geodetic System 1984
      grid_mapping_name :
      latitude_longitude
      spatial_ref :
      GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
      GeoTransform :
      -112.306249989939 0.0020833333331466974 0.0 33.38749999700897 0.0 -0.0020833333331466974
      array(0)
    • NDVI
      (date)
      float32
      0.1927 0.1838 ... 0.2401 0.2125
      array([0.19271918, 0.18381515, 0.19337933, 0.20641223, 0.20527625,
             0.21355347, 0.2023626 , 0.16430517, 0.16619605, 0.17495242,
             0.17955731, 0.17866392, 0.1823953 , 0.19946143, 0.18366547,
             0.18079217, 0.1824482 , 0.18470453, 0.19153741, 0.1927115 ,
             0.19525106, 0.1695551 , 0.1649305 , 0.1697008 , 0.17696564,
             0.1837304 , 0.18533777, 0.18291627, 0.23815368, 0.22949411,
             0.23293541, 0.23651674, 0.23668632, 0.2530914 , 0.24353525,
             0.19812824, 0.1994614 , 0.20753407, 0.20031747, 0.21266024,
             0.22796558, 0.23437786, 0.17103131, 0.17005202, 0.17210265,
             0.17131774, 0.19878057, 0.19375874, 0.1949291 , 0.19657859,
             0.19268829, 0.19112544, 0.20378365, 0.20515397, 0.20518044,
             0.21880375, 0.17568262, 0.17510954, 0.17935917, 0.177187  ,
             0.17686719, 0.18209565, 0.1875214 , 0.18931653, 0.19258715,
             0.18992841, 0.20409796, 0.208838  , 0.21272281, 0.21206588,
             0.16482991, 0.16479243, 0.16797793, 0.174789  , 0.17950518,
             0.17687833, 0.18235481, 0.1622588 , 0.16400786, 0.1716131 ,
             0.1784849 , 0.18249896, 0.20188944, 0.19981584, 0.17132477,
             0.17269845, 0.17403664, 0.18281408, 0.18643816, 0.18640187,
             0.19743215, 0.18048275, 0.18183462, 0.18884079, 0.19112252,
             0.18813588, 0.19747396, 0.21965328, 0.2039193 , 0.20074563,
             0.20152187, 0.19695136, 0.20072898, 0.1971879 , 0.20494525,
             0.17555507, 0.17464142, 0.17789689, 0.17445002, 0.18335992,
             0.18904908, 0.18390723, 0.17723632, 0.17910889, 0.17861342,
             0.18629305, 0.20751695, 0.22941755, 0.21218234, 0.1677444 ,
             0.17593652, 0.16884573, 0.17662714, 0.18009892, 0.19361915,
             0.18507524, 0.21518232, 0.21642888, 0.2095887 , 0.21047847,
             0.21791482, 0.20616964, 0.20935465, 0.21053253, 0.20722507,
             0.20223613, 0.20523421, 0.20179982, 0.200584  , 0.2117647 ,
             0.18132074, 0.1810891 , 0.18956153, 0.18773848, 0.21792652,
             0.34324187, 0.26084283, 0.18015642, 0.18592848, 0.185401  ,
             0.18081154, 0.2009879 , 0.24013723, 0.21253909], dtype=float32)
    • date
      PandasIndex
      PandasIndex(DatetimeIndex(['2001-01-14', '2001-01-16', '2001-01-17', '2001-01-19',
                     '2001-01-20', '2001-01-22', '2001-01-24', '2002-01-14',
                     '2002-01-16', '2002-01-17',
                     ...
                     '2021-01-20', '2021-01-22', '2021-01-24', '2022-01-14',
                     '2022-01-16', '2022-01-17', '2022-01-19', '2022-01-20',
                     '2022-01-22', '2022-01-24'],
                    dtype='datetime64[ns]', name='date', length=154, freq=None))

Set Up DiD Analysis¶

A difference in difference OLS regression assumes we have similar trends across each dataset, which is largely true here due to proximity but is roughly tested below. Once similar trends can be established, effects from outside of shared trends should be easier to identify. More information on DiD can be found here.

In [21]:
# Modify arrays to dataframes and combine

gric_df = gric_mean['NDVI'].to_dataframe().reset_index()
sric_df = sric_mean['NDVI'].to_dataframe().reset_index()

gric_df['group'] = 'gric'
sric_df['group'] = 'sric'

df = pd.concat([gric_df, sric_df], ignore_index=True)

df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['doy'] = df['date'].dt.dayofyear
df['treated'] = (df['group'] == 'gric').astype(int)
df['post'] = (df['year'] >= 2005).astype(int)
df['did'] = df['treated'] * df['post']

df
Out[21]:
date band spatial_ref NDVI group year month doy treated post did
0 2001-01-14 1 0 0.192719 gric 2001 1 14 1 0 0
1 2001-01-16 1 0 0.183815 gric 2001 1 16 1 0 0
2 2001-01-17 1 0 0.193379 gric 2001 1 17 1 0 0
3 2001-01-19 1 0 0.206412 gric 2001 1 19 1 0 0
4 2001-01-20 1 0 0.205276 gric 2001 1 20 1 0 0
... ... ... ... ... ... ... ... ... ... ... ...
303 2022-01-17 1 0 0.203360 sric 2022 1 17 0 1 0
304 2022-01-19 1 0 0.194895 sric 2022 1 19 0 1 0
305 2022-01-20 1 0 0.208739 sric 2022 1 20 0 1 0
306 2022-01-22 1 0 0.245338 sric 2022 1 22 0 1 0
307 2022-01-24 1 0 0.238629 sric 2022 1 24 0 1 0

308 rows × 11 columns

In [22]:
# Clean up dataframe 

df = df.drop(columns=['band', 'spatial_ref'])
df = df.rename(columns={'NDVI': 'ndvi'})
df = df.reset_index(drop=True)

df
Out[22]:
date ndvi group year month doy treated post did
0 2001-01-14 0.192719 gric 2001 1 14 1 0 0
1 2001-01-16 0.183815 gric 2001 1 16 1 0 0
2 2001-01-17 0.193379 gric 2001 1 17 1 0 0
3 2001-01-19 0.206412 gric 2001 1 19 1 0 0
4 2001-01-20 0.205276 gric 2001 1 20 1 0 0
... ... ... ... ... ... ... ... ... ...
303 2022-01-17 0.203360 sric 2022 1 17 0 1 0
304 2022-01-19 0.194895 sric 2022 1 19 0 1 0
305 2022-01-20 0.208739 sric 2022 1 20 0 1 0
306 2022-01-22 0.245338 sric 2022 1 22 0 1 0
307 2022-01-24 0.238629 sric 2022 1 24 0 1 0

308 rows × 9 columns

In [23]:
# Plot mean yearly NDVI across both to compare trends prior to 2005

df_yearly = df.groupby(['year', 'group']).ndvi.mean().reset_index()

plt.figure(figsize=(10,6))

for g in ['gric', 'sric']:
    subset = df_yearly[df_yearly['group'] == g]
    plt.plot(subset['year'], subset['ndvi'], marker='o', label=g.upper())

# vertical line at 2005
plt.axvline(2005, color='k', linestyle='--', label='Post Settlement')

plt.title("Parallel Trends Check: NDVI on GRIC compared to SRIC")
plt.ylabel("Mean NDVI")
plt.xlabel("Year")
plt.legend()
plt.grid(True)
plt.savefig("img/ndvi_parallel_trends.png", dpi=300, bbox_inches="tight")
plt.show()
No description has been provided for this image
In [49]:
# Run DID model

did_model = smf.ols(
    formula="ndvi ~ treated*post + C(year) + C(month)",
    data=df
).fit()

print(did_model.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   ndvi   R-squared:                       0.584
Model:                            OLS   Adj. R-squared:                  0.550
Method:                 Least Squares   F-statistic:                     17.33
Date:                Wed, 10 Dec 2025   Prob (F-statistic):           1.55e-41
Time:                        20:36:14   Log-Likelihood:                 805.33
No. Observations:                 308   AIC:                            -1563.
Df Residuals:                     284   BIC:                            -1473.
Df Model:                          23                                         
Covariance Type:            nonrobust                                         
===================================================================================
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           0.2310      0.006     41.920      0.000       0.220       0.242
C(year)[T.2002]    -0.0213      0.007     -3.052      0.002      -0.035      -0.008
C(year)[T.2003]    -0.0055      0.007     -0.785      0.433      -0.019       0.008
C(year)[T.2004]    -0.0199      0.007     -2.855      0.005      -0.034      -0.006
C(year)[T.2005]     0.0385      0.005      8.022      0.000       0.029       0.048
C(year)[T.2006]     0.0120      0.005      2.491      0.013       0.003       0.021
C(year)[T.2007]    -0.0158      0.005     -3.298      0.001      -0.025      -0.006
C(year)[T.2008]     0.0141      0.005      2.941      0.004       0.005       0.024
C(year)[T.2009]    -0.0116      0.005     -2.417      0.016      -0.021      -0.002
C(year)[T.2010]     0.0104      0.005      2.162      0.031       0.001       0.020
C(year)[T.2011]    -0.0207      0.005     -4.319      0.000      -0.030      -0.011
C(year)[T.2012]    -0.0211      0.005     -4.390      0.000      -0.031      -0.012
C(year)[T.2013]    -0.0063      0.005     -1.310      0.191      -0.016       0.003
C(year)[T.2014]    -0.0022      0.005     -0.458      0.647      -0.012       0.007
C(year)[T.2015]    -0.0025      0.005     -0.517      0.606      -0.012       0.007
C(year)[T.2016]    -0.0183      0.005     -3.822      0.000      -0.028      -0.009
C(year)[T.2017]    -0.0036      0.005     -0.755      0.451      -0.013       0.006
C(year)[T.2018]    -0.0241      0.005     -5.030      0.000      -0.034      -0.015
C(year)[T.2019]     0.0085      0.005      1.780      0.076      -0.001       0.018
C(year)[T.2020]     0.0101      0.005      2.094      0.037       0.001       0.019
C(year)[T.2021]     0.0271      0.005      5.650      0.000       0.018       0.037
C(year)[T.2022]    -0.0029      0.005     -0.608      0.544      -0.012       0.007
treated            -0.0341      0.005     -6.916      0.000      -0.044      -0.024
post               -0.0086      0.005     -1.572      0.117      -0.019       0.002
treated:post        0.0085      0.005      1.561      0.120      -0.002       0.019
==============================================================================
Omnibus:                      190.755   Durbin-Watson:                   1.114
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             3348.597
Skew:                           2.173   Prob(JB):                         0.00
Kurtosis:                      18.558   Cond. No.                     5.76e+15
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 2.14e-29. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
In [50]:
# Save the model output

with open("img/did_model_summary.html", "w") as f:
    f.write(did_model.summary().as_html())

DiD Conclusion¶

Unfortunately, this type of analysis does not lead to very strong results, largely due to annual variability overshadowing any effects the increased flow might have had. Our primary variable is treated:post = 0.0085, which on a scale of 0-1 is a very small (although still positive!) impact when taking out similar trends between the datasets. I believe this is also partly due to the GRIC seeing increased development as time goes on. Having more data on the front end could help as well. By analyzing negative growth trends compared to street maps or similar information, my next step would be to mask over any increased development in both the GRIC and SRIC to isolate as much 'pure' vegetation data for analysis as possible, but I have not been able to work that out yet beyond estimation and eyeballing maps.¶

In [42]:
# Plot the two areas comparing mean values before and after 2005 for overview

# GRIC NDVI
gric_pre = gric_ndvi.sel(date=slice("2001-01-01", "2004-12-31")).NDVI.mean(dim='date')
gric_post = gric_ndvi.sel(date=slice("2005-01-01", "2022-12-31")).NDVI.mean(dim='date')
gric_diff = gric_post - gric_pre

# SRIC NDVI (control)
sric_pre = sric_ndvi.sel(date=slice("2001-01-01", "2004-12-31")).NDVI.mean(dim='date')
sric_post = sric_ndvi.sel(date=slice("2005-01-01", "2022-12-31")).NDVI.mean(dim='date')
sric_diff = sric_post - sric_pre

# Plot GRIC pre/post NDVI change
plt.figure(figsize=(5,4))
gric_diff.plot(cmap='RdYlGn', vmin=-0.05, vmax=0.05)
plt.title('GRIC NDVI change post-2005 (mean difference)')
plt.savefig("img/gric_change.png", dpi=300, bbox_inches="tight")
plt.show()

# Plot SRIC pre/post NDVI change
plt.figure(figsize=(5,4))
sric_diff.plot(cmap='RdYlGn', vmin=-0.05, vmax=0.05)
plt.title('SRIC NDVI change post-2005 (mean difference)')
plt.savefig("img/sric_change.png", dpi=300, bbox_inches="tight")
plt.show()
No description has been provided for this image
No description has been provided for this image

Plot Running Average Against Pre-Settlement Mean¶

I have not figured out how to implement SRIC data as a control on a raster level, but plottting each years running average from 2005 compared to mean values prior can be helpful in identifying areas of increased and decreased NDVI data that would be worth looking into further.

In [51]:
# Calculate mean values prior to the settlement

gric_ndvi_pre = (gric_ndvi
             .sel(date=slice('2001', '2004'))
             .mean('date')
             .NDVI
)

# Calculate annual mean values post settlement
gric_ndvi_post = gric_ndvi.sel(date=slice('2005', '2022')).NDVI

gric_post_yearly = gric_ndvi_post.groupby('date.year').mean('date')

# Confirm grouping correctly
len(gric_post_yearly)
Out[51]:
18
In [28]:
# Calculate difference between each year post and mean prior

gric_diff = gric_post_yearly - gric_ndvi_pre

print(gric_diff.dims)
print(gric_diff.coords)
('year', 'y', 'x')
Coordinates:
    band         int64 8B 1
  * x            (x) float64 3kB -112.3 -112.3 -112.3 ... -111.5 -111.5 -111.5
  * y            (y) float64 2kB 33.39 33.38 33.38 33.38 ... 32.97 32.97 32.97
    spatial_ref  int64 8B 0
  * year         (year) int64 144B 2005 2006 2007 2008 ... 2019 2020 2021 2022
In [31]:
# Plot the difference

diff_plot = (
    gric_diff.hvplot(x='x', y='y', cmap='RdYlGn', geo=True, groupby='year',
                     clim=(-0.5, 0.5),   
                     title = 'Gila River Indian Community NDVI\n'
                     'Comparing 2001-2004 to Following Years')
    *
    gric_gdf.hvplot(geo=True, fill_color=None, line_color='black')
)

diff_plot
Out[31]:
BokehModel(combine_events=True, render_bundle={'docs_json': {'e9c377f4-bb84-4fea-81f6-831fb75b2eac': {'version…
In [32]:
# Move the slider to the bottom of the plot and force 2005 start

years = list(diff_plot.kdims[0].values)
years.sort()     # just to be safe

diff_plot_bottom = pn.panel(
    diff_plot,
    widget_location='bottom',
    widgets={
        'year': pn.widgets.DiscreteSlider(
            name='year',
            options=years,
            value=years[0]
        )
    }
)

diff_plot_bottom
Out[32]:
BokehModel(combine_events=True, render_bundle={'docs_json': {'ecbbcbf4-6c17-4aca-b2db-31f9c4152d52': {'version…
In [41]:
# Save plot for comparison

diff_plot_bottom.save('img/gric_diff_plot.html', embed=True)
                                               
WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='4137e24d-9ed0-4aa1-ac44-5883760b4de1', ...)
In [34]:
# Calculate cumulative sum for each year from settlement

cumulative_sum = gric_post_yearly.cumsum('year')

len(cumulative_sum)
Out[34]:
18
In [35]:
# Calculate year count for each year post settlement

year_count = xr.DataArray(
    range(1, len(gric_post_yearly.year) + 1),
    coords=[gric_post_yearly.year],
    dims=['year']
)

len(year_count)
Out[35]:
18
In [36]:
# Calculate average for each year

gric_run_avg = cumulative_sum / year_count

len(gric_run_avg)
Out[36]:
18
In [37]:
# Calculate average difference from pre-settlement mean

new_gric_diff = gric_run_avg - gric_ndvi_pre

print(new_gric_diff.dims)
print(new_gric_diff.coords)
('year', 'y', 'x')
Coordinates:
    band         int64 8B 1
  * x            (x) float64 3kB -112.3 -112.3 -112.3 ... -111.5 -111.5 -111.5
  * y            (y) float64 2kB 33.39 33.38 33.38 33.38 ... 32.97 32.97 32.97
    spatial_ref  int64 8B 0
  * year         (year) int64 144B 2005 2006 2007 2008 ... 2019 2020 2021 2022
In [39]:
# Plot the new running average
 
new_diff_plot = (
    new_gric_diff.hvplot(x='x', y='y', cmap='RdYlGn', geo=True, groupby='year',
                     clim=(-0.5, 0.5),   
                     title = 'Gila River Indian Community NDVI\n'
                     'Comparing 2001-2004 to Average of Following Years')
    *
    gric_gdf.hvplot(geo=True, fill_color=None, line_color='black')
)

years = list(diff_plot.kdims[0].values)
years.sort()

# Move slider to the bottom and force correct start
new_diff_plot_bottom = pn.panel(
    new_diff_plot,
    widget_location='bottom',
    widgets={
        'year': pn.widgets.DiscreteSlider(
            name='year',
            options=years,
            value=years[0]
        )
    }
)

new_diff_plot_bottom
Out[39]:
BokehModel(combine_events=True, render_bundle={'docs_json': {'15792bcd-eea6-4856-b4f2-f6dff4474570': {'version…
In [40]:
# Save the new plot

new_diff_plot_bottom.save('img/gric_avg_diff_plot.html', embed=True)
                                               
WARNING:bokeh.core.validation.check:W-1005 (FIXED_SIZING_MODE): 'fixed' sizing mode requires width and height to be set: figure(id='d80134e9-4371-46cf-bd91-a5df779d3367', ...)

Running Average Conclusion¶

Both of these plots show that in most areas there has not been substantial change in NDVI levels over time, and areas of change in both directions tend to be concentrated together. The running average plot in particular highlights areas that have shown consistent change over time in the same direction and visually supports one of my DiD results interpretations that development and areas of intentionally negative NDVI change have offset areas of high growth, which can be seen most clearly in the middle of the territory where development on the southern border and agriculture on the northern border have increased.¶